SenseMyFEUP data

Data loading

Data is filtered by travelmode (car and bus) and date (April 2016).

Top 10% points

    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
     1.0      9.0     37.0    828.1    189.0 873300.0 

ECDF

# Classifying points by day for ECDF 
df_intersession_april <-mutate(df_intersession_april, class = ifelse(hour(time) >7 & hour(time) < 20, 
                        "day",
                        "night"))
df_osm_edge <- df_intersession_april %>% 
  merge( y = df_osm_edge, by = "way_id") 
ggplot(df_osm_edge, aes(points, color= class)) + 
  scale_x_log10() + stat_ecdf(geom = "step")

ECDF all by #points

rr ecdf_all ecdf_day ecdf_night

ECDF by #sessions

Map points April 2016

Showing maps

Traffic Map April 2016

Traffic day Map April 2016

Traffic night Map April 2016

Map Intersession time all 1.30h.

Map Intersession time (<1.3h) day.

m_itm_day <- m
m_itm_day

#mapshot(m_night_semsessions, file = "~/maps/top_intersessions_night_april.png")

Map Intersession time (<2h)night.

m_itm_night <- m
m_itm_night

#mapshot(m, file = "~/maps/_all_april.png")

Intersession times

Intersession time

ECDF

Lowest 35%

By date

Speeds

Speed by week

Speed by day

Speed by hour

Number of session per hour

Ways_ids per half hour

---
title: 'Sense My FEUP - April 2016 Data'
author: "Daniela S. Gil"
output: html_notebook
---

```{r echo = FALSE, eval=FALSE}
#save.image("SensemyWorkSpace.RData")
load("df_first_session_cars_april16.Rda")
load("SensemyWorkSpace.RData")

```


# SenseMyFEUP data

## Data loading
 Data is filtered by travelmode (car and bus) and date (April 2016).      

```{r echo = FALSE, eval=FALSE}
#All points
df_osm_edge <- df_points_edge_cars_april16

#Day
df_osm_edge <- df_points_edge_cars_april16_day

# Night 
df_osm_edge <- df_points_edge_cars_april16_night

#df_ <- df_first_session_cars_april16 %>%
#  filter( HOUR(time_start) > 7, HOUR(time_start) < 20)

```


```{r echo = FALSE }
# Filter by median interssion time
df_osm_edge <- df_intersession_april %>% 
  group_by(way_id) %>% 
  summarise(median_itm = median(intersession_time), n_session = n()+1) %>% 
  filter(median_itm < 90, n_session > 5) %>% 
  merge( y = df_osm_edge, by = "way_id") 
  
#Filter by day and the median 
df_osm_edge <- df_intersession_april %>% 
  filter(hour(time) >=7, hour(time) <= 20,intersession_time > 0) %>% 
  group_by(way_id) %>% 
  summarise(median_itm = median(intersession_time), n_session = n()+1) %>% 
  filter( median_itm < 90) %>% 
  merge( y = df_osm_edge, by = "way_id") 

#Filter by night and the median 
df_osm_edge <- df_intersession_april %>% 
  filter(intersession_time > 0, hour(time) < 7 | hour(time) > 20) %>% 
  group_by(way_id) %>% 
  summarise(median_itm = median(intersession_time), n_session = n()+1) %>% 
  filter( median_itm < 120) %>% 
  merge( y = df_osm_edge, by = "way_id") 

```


```{r echo=FALSE}
#Necessary for mapping 
#Reorder columns
df_osm_edge <- df_osm_edge %>% 
  select(way_id, points, median_itm, n_session)

list_osm_edge <- df_osm_edge[, 1]

```


Top 10% points
```{r echo = FALSE, eval=TRUE}
summary(df_osm_edge$points)
quantile(df_osm_edge$points, 0.90)

filter(df_osm_edge, points > quantile(df_osm_edge$points, 0.90) ) %>% 
  ggplot(aes(points)) +
  geom_histogram() 
  
  
```

## ECDF 

```{r echo = FALSE, eval = FALSE}

fun.ecdf <- ecdf(df_osm_edge$points)

my.ecdf <- fun.ecdf(sort(df_osm_edge$points))

my_ecdf_df <- data.frame(my.ecdf)

my_ecdf_df$points <- sort(df_osm_edge$points)
tail(my_ecdf_df, 400)

```

```{r echo=FALSE }
#P_all <- subset(df_osm_edge, points < quantile(df_osm_edge$points,.9))
#P_day <- subset(df_osm_edge, points < quantile(df_osm_edge$points,.9))
#P_night <- subset(df_osm_edge, points < quantile(df_osm_edge$points,.9))

#P <- ecdf(subset(df_osm_edge, points < quantile(df_osm_edge$points,.9))$points)
#plot(P, log="x", xlim=c(1, max(df_osm_edge$points)))
```

```{r}
# Classifying points by day for ECDF 
df_intersession_april <-mutate(df_intersession_april, class = ifelse(hour(time) >7 & hour(time) < 20, 
                        "day",
                        "night"))

df_osm_edge <- df_intersession_april %>% 
  merge( y = df_osm_edge, by = "way_id") 

ggplot(df_osm_edge, aes(points, color= class)) + 
  scale_x_log10() + stat_ecdf(geom = "step")
```

ECDF all by #points 
```{r  echo = FALSE, eval = TRUE }

ecdf_all <- ggplot(df_osm_edge, aes(points)) + 
  scale_x_log10() + stat_ecdf(geom = "step")

ecdf_day<- ggplot(df_osm_edge, aes(points)) + 
  scale_x_log10() + stat_ecdf(geom = "step") + title("ECDF day")

ecdf_night <- ggplot(df_osm_edge, aes(points)) + 
  scale_x_log10() + stat_ecdf(geom = "step") + title("ECDF night")

grid.arrange(ecdf_all,ecdf_day,ecdf_night,  ncol=3)

```

```{r}
ecdf_all 
ecdf_day
ecdf_night
```


ECDF by #sessions 

```{r echo=FALSE}
df_intersession_april %>% 
  group_by(way_id) %>% 
  summarise(sessions = n() ) %>% 
  ggplot(aes(sessions)) + 
  scale_x_log10() + stat_ecdf(geom = "step") 
  
```
## Map points April 2016

```{r echo=FALSE, eval=FALSE}
# Creating the empty map of Porto

#Points
#boxplot(df_osm_edge$points)

feup <- quantile(df_osm_edge$points, 0.95)
superior <- quantile(df_osm_edge$points, 0.9)
medio <- quantile(df_osm_edge$points, 0.7)
low <- quantile(df_osm_edge$points, 0.5) 

m <- leaflet() %>% setView(lng=-8.61419, lat=41.16311, zoom = 13)
m <- addTiles(m) 
m <- addProviderTiles(m, "CartoDB.Positron")

counter <- 1

for(way_id in list_osm_edge) {
  
  df_way_id <- dbGetQuery(con_osm, paste0("SELECT st_astext(st_transform(way, 4326)) AS line FROM planet_osm_line WHERE planet_osm_line.osm_id = ", way_id))
  
  line <- df_way_id$line
  line <- as.character(line)
  
  line <- unlist(strsplit(line, split='(', fixed=TRUE))[2]
  line <- substr(line, 1, nchar(line) - 1)
  
  parsed_line <- strsplit(line, ",")
  
  lons <- c()
  lats <- c()
  
  if(length(parsed_line) != 0) {
    
    #Defining lons and lats as variables to use later.

    for(coord in parsed_line[[1]]) {
      
      lon <- unlist(strsplit(coord, split=' ', fixed=TRUE))[1]
      lat <- unlist(strsplit(coord, split=lon, fixed=TRUE))[2]
      lat <- substr(lat, 2, nchar(lat))
      
      lon <- as.numeric(lon)
      lat <- as.double(lat)
      
      lons <- c(lons, lon)
      lats <- c(lats, lat)
      
    }
    
    # Deciding the color of the point.

    if(df_osm_edge[counter, 2] > feup) {
          
          m <- addPolylines(m, lons, lats, color='blue')
        
        } else if (df_osm_edge[counter, 2] >= superior && df_osm_edge[counter, 2] <= feup) {
          
          m <- addPolylines(m, lons, lats, color='red')  
        
        } else if (df_osm_edge[counter, 2] >= medio && df_osm_edge[counter, 2] <= superior) {
          
          m <- addPolylines(m, lons, lats, color='yellow')
          
        } else if (df_osm_edge[counter, 2] >= low && df_osm_edge[counter, 2] <= medio) {
          
          m <- addPolylines(m, lons, lats, color='green')
          
        }
        
        counter <- counter + 1 
        
      }
      
      #print(line)
      
    }

```
### Showing maps 

Traffic  Map April 2016
```{r eval= TRUE}
# Showing Map.
m 
#mapshot(m, url = paste0(getwd(), "/map.html"))
```

Traffic day Map April 2016
```{r echo=FALSE}
m_day <- m
m_day
```

Traffic night Map April 2016
```{r echo=FALSE}
m_night <- m
m_night
```

Map Intersession time all 1.30h.
```{r echo=FALSE}
m_itm <- m
m_itm
```

Map Intersession time (<1.3h) day.
```{r}
m_itm_day <- m
m_itm_day
#mapshot(m_night_semsessions, file = "~/maps/top_intersessions_night_april.png")
```
Map Intersession time (<2h)night.
```{r}
m_itm_night <- m
m_itm_night
#mapshot(m, file = "~/maps/_all_april.png")
```


## Intersession times

```{r echo=FALSE, eval=FALSE}
# Transforming seconds to timestamp and calculating intersession time.

df_intersession_april$time <- as.POSIXct(df_intersession_april$min, origin="1970-01-01")

df_intersession_april <- df_intersession_april  %>%
  arrange(desc(way_id), time) %>% 
  mutate(intersession_time = c(0,as.numeric(diff(time), units="mins")))

# Remove Min column
# df_intersession_april$min <- NULL 
```

Intersession time  
```{r echo=FALSE}
summary(df_intersession_april$intersession_time)

df_intersession_april %>% 
  filter(intersession_time > 0) %>% 
  ggplot(aes(intersession_time/60)) + 
  geom_histogram() + 
  xlab("Intersession time (hours)") + 
  scale_x_continuous(breaks = seq(0,200,20))
```


### ECDF 
```{r echo=FALSE}
e1 <- ggplot(subset(df_intersession_april, intersession_time > 0), aes(intersession_time)) + 
  stat_ecdf(geom = "step") +  xlab("Intersession time")

e2 <- ggplot(subset(df_intersession_april, intersession_time > 0), aes(intersession_time)) + 
  scale_x_log10() +stat_ecdf(geom = "step")  + xlab("Intersession time Log")

grid.arrange(e1, e2, ncol= 2)
```


### Lowest 35% 


```{r echo=FALSE, eval=TRUE}
subset(df_intersession_april, intersession_time < quantile(df_intersession_april$intersession_time, 0.35)) %>% 
  ggplot(aes(intersession_time)) +
  geom_histogram(bins = 10) + 
  scale_x_continuous(breaks = seq(0,200,15))+
  xlab("Intersession time (mins)")
```


```{r}
df_intersession_april %>% 
  filter(intersession_time > 0) %>% 
  group_by(way_id) %>% 
  summarise(median_itm = median(intersession_time), n_session = n()+1) %>% 
  filter(median_itm < 130) %>% 
  ggplot(aes(median_itm)) + 
  geom_histogram(bins = 30) + 
  scale_x_continuous(breaks = seq(0,150,5))

```


## By date 

```{r echo= FALSE} 
df_intersession_april %>% 
  group_by(weekday =wday(time)) %>% 
  summarise(way_ids = n()) %>% 
  ggplot(aes(weekday,way_ids)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(0,23,1))

```

```{r echo= FALSE}
df_intersession_april %>% 
  group_by(day =day(time)) %>% 
  summarise(n = n()) %>% 
  ggplot(aes(day,n)) + 
  geom_line() + 
  scale_x_continuous(breaks = seq(1,30,1))

```

```{r echo= FALSE} 
df_intersession_april %>% 
  group_by(hour =hour(time)) %>% 
  summarise(n = n()) %>% 
  ggplot(aes(hour,n)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(0,23,1))

```

## Speeds 

```{r}
# Speed by way_id and session.\
avg_speed_wayid <- df_speed %>% 
  group_by(way_id) %>% 
  summarise(avg_speed = mean((speed*18)/5), n = n() )

ggplot(avg_speed_wayid, aes(avg_speed)) +
  geom_histogram(binwidth = 5) + 
  scale_x_continuous(name = "Avg_speed(m/s)",  breaks = seq(0, 150, 10) )
```

Speed by week 
```{r echo=FALSE}
df_speed %>% 
group_by(weekday =wday(time)) %>% 
  summarise(avg_speed = mean((speed*18)/5)) %>% 
  ggplot(aes(weekday, avg_speed)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(0,7,1))+
  ylab("Avg speed km/h")
```

Speed by day

```{r echo=FALSE}
df_speed %>% 
  group_by(day =day(time)) %>% 
  summarise(avg_speed = mean((speed*18)/5)) %>% 
  ggplot(aes(day, avg_speed)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(0,30,1))+
  ylab("Avg speed km/h")
```

Speed by hour
```{r echo=FALSE}
df_speed %>% 
  group_by(hour =hour(time)) %>% 
  summarise(avg_speed = mean((speed*18)/5)) %>% 
  ggplot(aes(hour, avg_speed)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(0,23,1))+
  ylab("Avg speed km/h")
```

Number of session per hour
```{r echo=FALSE}
df_speed %>% 
    group_by(hour =hour(time)) %>% 
    summarise(sessions = n_distinct(session_id)) %>% 
  ggplot(aes(hour, sessions)) + 
  geom_line()+
  scale_x_continuous(breaks = seq(0,23,1))
```

Ways_ids per half hour
```{r}
prueba <- table(cut(df_speed$time, breaks = "30 mins"))
plot(prueba, xlab = "date", ylab = "frequency")
```








